#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')
library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(plotlyutils)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(Rtsne)
library(ClusterR)
library(DESeq2) ; library(biomaRt)
library(knitr)
Load preprocessed dataset (preprocessing code in 20_03_02_data_preprocessing.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# Update DE_info with Neuronal information
DE_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(GO_neuronal, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(significant=padj<0.05 & !is.na(padj))
rm(GO_annotations)
plot_data = data.frame('ID'=rownames(datExpr), 'Mean'=rowMeans(datExpr))
p1 = ggplotly(plot_data %>% ggplot(aes(Mean)) + geom_density(color='#0099cc', fill='#0099cc', alpha=0.3) +
scale_x_log10() + theme_minimal())
plot_data = data.frame('ID'=colnames(datExpr), 'Mean'=colMeans(datExpr))
p2 = ggplotly(plot_data %>% ggplot(aes(Mean)) + geom_density(color='#0099cc', fill='#0099cc', alpha=0.3) +
theme_minimal() + ggtitle('Mean expression density by Gene (left) and by Sample (right)'))
subplot(p1, p2, nrows=1)
rm(p1, p2, plot_data)
The two groups of genes seem to be partially characterised by genes with Neuronal function
In general, the autism group has a bigger mean but this time the control group seems to have a wider variance
plot_data = data.frame('ID'=rownames(datExpr), 'Mean'=rowMeans(datExpr)) %>%
left_join(GO_neuronal, by='ID') %>% mutate('Neuronal'=ifelse(is.na(Neuronal),F,T))
p1 = plot_data %>% ggplot(aes(Mean, color=Neuronal, fill=Neuronal)) + geom_density(alpha=0.3) +
scale_x_log10() + theme_minimal() + theme(legend.position='bottom') +
ggtitle('Mean expression density by gene')
plot_data = data.frame('ID'=colnames(datExpr), 'Mean'=colMeans(datExpr)) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID'))
p2 = plot_data %>% ggplot(aes(Mean, color=Diagnosis, fill=Diagnosis)) + geom_density(alpha=0.3) +
theme_minimal() + theme(legend.position='bottom') +
ggtitle('Mean expression density by Sample')
grid.arrange(p1, p2, nrow=1)
rm(GO_annotations, plot_data, p1, p2)
In general there doesn’t seem to be a lot of variance in mean expression between autism and control samples by gene.
plot_data = data.frame('ID'=rownames(datExpr),
'ASD'=rowMeans(datExpr[,datMeta$Diagnosis=='ASD']),
'CTL'=rowMeans(datExpr[,datMeta$Diagnosis!='ASD']))
plot_data %>% ggplot(aes(ASD,CTL)) + geom_point(alpha=0.1, color='#0099cc') +
geom_abline(color='gray') + ggtitle('Mean expression ASD vs CTL') + theme_minimal()
There doesn’t seem to be a noticeable difference between mean expression by gene between Diagnosis groups
Samples with autism tend to have a narrower dispersion of mean expression with higher values than the control group (as we had already seen above)
plot_data = rbind(data.frame('Mean'=rowMeans(datExpr[,datMeta$Diagnosis=='ASD']), 'Diagnosis'='ASD'),
data.frame('Mean'=rowMeans(datExpr[,datMeta$Diagnosis!='ASD']), 'Diagnosis'='CTL')) %>%
mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
p1 = ggplotly(plot_data %>% ggplot(aes(Mean, color=Diagnosis, fill=Diagnosis)) +
geom_density(alpha=0.3) + scale_x_log10() + theme_minimal())
plot_data = rbind(data.frame('Mean'=colMeans(datExpr[,datMeta$Diagnosis=='ASD']), 'Diagnosis'='ASD'),
data.frame('Mean'=colMeans(datExpr[,datMeta$Diagnosis!='ASD']), 'Diagnosis'='CTL')) %>%
mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
p2 = ggplotly(plot_data %>% ggplot(aes(Mean, color=Diagnosis, fill=Diagnosis)) +
geom_density(alpha=0.3) + theme_minimal() +
ggtitle('Mean expression by Gene (left) and by Sample (right) grouped by Diagnosis'))
subplot(p1, p2, nrows=1)
rm(p1, p2, plot_data)
The first principal separates perfectly the two diagnosis
pca = datExpr %>% t %>% prcomp
plot_data = data.frame('ID'=colnames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
dplyr::select('ID','PC1','PC2','Diagnosis') %>%
mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
plot_data %>% ggplot(aes(PC1, PC2, color=Diagnosis)) + geom_point() + theme_minimal() + ggtitle('PCA') +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)'))
rm(pca, plot_data)
Looks exactly the same as the PCA visualisation, just inverting the x axis
d = datExpr %>% t %>% dist
fit = cmdscale(d, k=2)
plot_data = data.frame('ID'=colnames(datExpr), 'C1'=fit[,1], 'C2'=fit[,2]) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
dplyr::select('C1','C2','Diagnosis') %>%
mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
plot_data %>% ggplot(aes(C1, C2, color=Diagnosis)) + geom_point() + theme_minimal() + ggtitle('MDS')
rm(d, fit, plot_data)
Higher perplexities seem to capture the difference between diagnosis better. You can separate the diagonsis almost perfectly with a line using the highest perplexity
#perplexities = c(2,5,10,25)
perplexities = c(1,2,3,4)
ps = list()
for(i in 1:length(perplexities)){
set.seed(123)
tsne = datExpr %>% t %>% Rtsne(perplexity=perplexities[i])
plot_data = data.frame('ID'=colnames(datExpr), 'C1'=tsne$Y[,1], 'C2'=tsne$Y[,2]) %>%
mutate('ID'=substring(ID,2)) %>% left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
dplyr::select('C1','C2','Diagnosis','Subject_ID') %>%
mutate('Diagnosis'=factor(Diagnosis, levels=c('CTL','ASD')))
ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=Diagnosis)) + geom_point() + theme_minimal() +
ggtitle(paste0('Perplexity=',perplexities[i])) + theme(legend.position='none')
}
grid.arrange(grobs=ps, nrow=2)
rm(ps, perplexities, tsne, i)
First Principal Component explains over 99% of the total variance
There’s a really strong correlation between the mean expression of a gene and the 1st principal component
pca = datExpr %>% prcomp
plot_data = data.frame( 'PC1' = pca$x[,1], 'PC2' = pca$x[,2], 'MeanExpr'=rowMeans(datExpr))
plot_data %>% ggplot(aes(PC1, PC2, color=MeanExpr)) + geom_point(alpha=0.3) + theme_minimal() +
scale_color_viridis() + ggtitle('PCA') +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)'))
rm(pca, plot_data)
Higher perplexities capture a cleaner visualisation of the data ordered by mean expression, in a similar (although not as linear) way to PCA
perplexities = c(1,2,5,10,50,100)
ps = list()
for(i in 1:length(perplexities)){
tsne = read.csv(paste0('./../Visualisations/tsne_perplexity_',perplexities[i],'.csv'))
plot_data = data.frame('C1'=tsne[,1], 'C2'=tsne[,2], 'MeanExpr'=rowMeans(datExpr))
ps[[i]] = plot_data %>% ggplot(aes(C1, C2, color=MeanExpr)) + geom_point(alpha=0.5) + theme_minimal() +
scale_color_viridis() + ggtitle(paste0('Perplexity = ',perplexities[i])) + theme(legend.position='none')
}
grid.arrange(grobs=ps, nrow=2)
rm(perplexities, ps, tsne, i)
cat(paste0(sum(is.na(DE_info$padj)), ' (~', round(100*(sum(is.na(DE_info$padj))/nrow(DE_info))),
'%) of the p-values couldn\'t be calculated'))
## 960 (~6%) of the p-values couldn't be calculated
table(DE_info$padj<0.05, useNA='ifany')
##
## FALSE TRUE <NA>
## 14308 1222 960
p = DE_info %>% ggplot(aes(log2FoldChange, padj, color=significant)) + geom_point(alpha=0.2) +
scale_y_sqrt() + xlab('log2 Fold Change') + ylab('Adjusted p-value') + theme_minimal()
ggExtra::ggMarginal(p, type = 'density', color='gray', fill='gray', size=10)
rm(p)
There is a clear negative relation between lfc and mean expression, for both differentially expressed and not differentially expressed groups of genes
The relation is strongest for genes with low levels of expression
The p-value of the genes with the lowest levels of expression couldn’t be calculated. DESeq2 does this with genes where it considers the level of expression is not high enough to get robust results
plot_data = data.frame('ID'=rownames(datExpr), 'meanExpr'=rowMeans(datExpr)) %>% left_join(DE_info, by='ID') %>%
mutate('statisticallySignificant' = ifelse(is.na(padj),NA, ifelse(padj<0.05, TRUE, FALSE)),
'alpha' = ifelse(padj>0.05 | is.na(padj), 0.1, 0.5))
plot_data %>% ggplot(aes(meanExpr, abs(log2FoldChange), color=statisticallySignificant)) +
geom_point(alpha = plot_data$alpha) + geom_smooth(method='lm') +
theme_minimal() + scale_y_sqrt() + theme(legend.position = 'bottom') +
xlab('Mean Expression') + ylab('abs(lfc)') + ggtitle('Log fold change by level of expression')
When filtering for differential expression, the points separate into two clouds depending on whether they are over or underexpressed
The top cloud corresponds to the underexpressed genes and the bottom to the overexpressed ones
datExpr_DE = datExpr[DE_info$significant,]
pca = datExpr_DE %>% prcomp
plot_data = cbind(data.frame('PC1'=pca$x[,1], 'PC2'=pca$x[,2]), DE_info[DE_info$significant==TRUE,])
pos_zero = -min(plot_data$log2FoldChange)/(max(plot_data$log2FoldChange)-min(plot_data$log2FoldChange))
p = plot_data %>% ggplot(aes(PC1, PC2, color=log2FoldChange)) + geom_point(alpha=0.5) +
scale_color_gradientn(colours=c('#F8766D','#faa49e','white','#00BFC4','#009499'),
values=c(0, pos_zero-0.05, pos_zero, pos_zero+0.05, 1)) +
theme_minimal() + ggtitle('
PCA of differentially expressed genes') + # This is on purpose, PDF doesn't save well without this white space (?)
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],1),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],1),'%)')) + theme(legend.position = 'bottom')
ggExtra::ggMarginal(p, type='density', color='gray', fill='gray', size=10)
rm(pos_zero, p)
Separating the genes into these two groups: Salmon: under-expressed, aqua: over-expressed
plot_data = plot_data %>% mutate('Group'=ifelse(log2FoldChange>0,'overexpressed','underexpressed')) %>%
mutate('Group' = factor(Group, levels=c('underexpressed','overexpressed')))
rm(pca)
List of top DE genes
# Get genes names
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=plot_data$ID, mart=mart) %>%
rename(external_gene_id = 'gene_name', ensembl_gene_id = 'ID')
## Cache found
plot_data = plot_data %>% left_join(gene_names, by='ID') %>% arrange(-abs(log2FoldChange)) %>%
top_n(50, wt=log2FoldChange)
kable(plot_data %>% dplyr::select(ID, gene_name, log2FoldChange, padj, Neuronal, Group) %>%
arrange(padj))
| ID | gene_name | log2FoldChange | padj | Neuronal | Group |
|---|---|---|---|---|---|
| ENSG00000188064 | WNT7B | 1.624086 | 0.0000307 | 1 | overexpressed |
| ENSG00000146250 | PRSS35 | 2.342708 | 0.0000323 | 0 | overexpressed |
| ENSG00000158966 | CACHD1 | 1.294322 | 0.0000656 | 0 | overexpressed |
| ENSG00000153993 | SEMA3D | 1.313359 | 0.0001418 | 0 | overexpressed |
| ENSG00000139874 | SSTR1 | 1.384499 | 0.0001817 | 0 | overexpressed |
| ENSG00000147402 | GABRQ | 1.789607 | 0.0004998 | 0 | overexpressed |
| ENSG00000103528 | SYT17 | 1.309546 | 0.0006371 | 0 | overexpressed |
| ENSG00000203685 | C1orf95 | 1.633768 | 0.0010367 | 0 | overexpressed |
| ENSG00000169432 | SCN9A | 1.466487 | 0.0010778 | 1 | overexpressed |
| ENSG00000136883 | KIF12 | 1.395399 | 0.0013233 | 0 | overexpressed |
| ENSG00000011638 | TMEM159 | 1.421262 | 0.0014462 | 0 | overexpressed |
| ENSG00000178531 | CTXN1 | 1.245119 | 0.0015140 | 0 | overexpressed |
| ENSG00000204060 | FOXO6 | 1.251705 | 0.0019624 | 0 | overexpressed |
| ENSG00000105048 | TNNT1 | 1.313702 | 0.0032848 | 0 | overexpressed |
| ENSG00000130055 | GDPD2 | 1.474066 | 0.0035667 | 0 | overexpressed |
| ENSG00000091831 | ESR1 | 1.725189 | 0.0036514 | 0 | overexpressed |
| ENSG00000175206 | NPPA | 1.621458 | 0.0036514 | 0 | overexpressed |
| ENSG00000183287 | CCBE1 | 1.673627 | 0.0037492 | 0 | overexpressed |
| ENSG00000198883 | PNMA5 | 1.991281 | 0.0040520 | 0 | overexpressed |
| ENSG00000088882 | CPXM1 | 1.609025 | 0.0047786 | 0 | overexpressed |
| ENSG00000109758 | HGFAC | 1.387459 | 0.0048862 | 0 | overexpressed |
| ENSG00000139211 | AMIGO2 | 1.507520 | 0.0051927 | 0 | overexpressed |
| ENSG00000101198 | NKAIN4 | 1.289720 | 0.0051927 | 0 | overexpressed |
| ENSG00000272636 | DOC2B | 1.263649 | 0.0052951 | 0 | overexpressed |
| ENSG00000180332 | KCTD4 | 1.613266 | 0.0072715 | 0 | overexpressed |
| ENSG00000181085 | MAPK15 | 2.141071 | 0.0086573 | 0 | overexpressed |
| ENSG00000101327 | PDYN | 1.280135 | 0.0087181 | 0 | overexpressed |
| ENSG00000183729 | NPBWR1 | 1.384369 | 0.0093830 | 0 | overexpressed |
| ENSG00000152580 | IGSF10 | 1.273370 | 0.0116095 | 0 | overexpressed |
| ENSG00000163520 | FBLN2 | 1.274226 | 0.0122027 | 0 | overexpressed |
| ENSG00000187848 | P2RX2 | 3.173400 | 0.0125335 | 1 | overexpressed |
| ENSG00000050628 | PTGER3 | 1.682533 | 0.0130087 | 0 | overexpressed |
| ENSG00000167754 | KLK5 | 1.625970 | 0.0151580 | 0 | overexpressed |
| ENSG00000102854 | MSLN | 1.764830 | 0.0156656 | 0 | overexpressed |
| ENSG00000113494 | PRLR | 1.285302 | 0.0160990 | 0 | overexpressed |
| ENSG00000269430 | LRRC3DN | 2.300733 | 0.0163146 | 0 | overexpressed |
| ENSG00000197901 | SLC22A6 | 1.368908 | 0.0174812 | 0 | overexpressed |
| ENSG00000146197 | SCUBE3 | 1.234709 | 0.0211671 | 0 | overexpressed |
| ENSG00000205592 | MUC19 | 1.914294 | 0.0219311 | 0 | overexpressed |
| ENSG00000101188 | NTSR1 | 1.236207 | 0.0238878 | 0 | overexpressed |
| ENSG00000163395 | IGFN1 | 1.509987 | 0.0244847 | 0 | overexpressed |
| ENSG00000166869 | CHP2 | 2.303129 | 0.0275550 | 0 | overexpressed |
| ENSG00000198840 | MT-ND3 | 1.454043 | 0.0277333 | 0 | overexpressed |
| ENSG00000186439 | TRDN | 1.409692 | 0.0315874 | 0 | overexpressed |
| ENSG00000112539 | C6orf118 | 1.771346 | 0.0322223 | 0 | overexpressed |
| ENSG00000172554 | SNTG2 | 1.334708 | 0.0328585 | 0 | overexpressed |
| ENSG00000105641 | SLC5A5 | 1.635872 | 0.0354484 | 0 | overexpressed |
| ENSG00000196932 | TMEM26 | 1.308970 | 0.0399796 | 0 | overexpressed |
| ENSG00000117650 | NEK2 | 1.491603 | 0.0402651 | 0 | overexpressed |
| ENSG00000143867 | OSR1 | 1.703485 | 0.0422852 | 0 | overexpressed |
Not only we have very few DE genes, but we lose most of them from very low log Fold Change thresholds
fc_list = seq(1, 1.3, 0.01)
#fc_list = c(seq(1,1.01, 0.002), seq(1.01, 1.04, 0.01))
n_genes = nrow(datExpr)
# Calculate PCAs
datExpr_pca_samps = datExpr %>% data.frame %>% t %>% prcomp(scale.=TRUE)
datExpr_pca_genes = datExpr %>% data.frame %>% prcomp(scale.=TRUE)
# Initialice DF to save PCA outputs
pcas_samps = datExpr_pca_samps$x %>% data.frame %>% dplyr::select(PC1:PC2) %>%
mutate('ID'=colnames(datExpr), 'fc'=-1, PC1=scale(PC1), PC2=scale(PC2))
pcas_genes = datExpr_pca_genes$x %>% data.frame %>% dplyr::select(PC1:PC2) %>%
mutate('ID'=rownames(datExpr), 'fc'=-1, PC1=scale(PC1), PC2=scale(PC2))
pca_samps_old = pcas_samps
pca_genes_old = pcas_genes
for(fc in fc_list){
# Recalculate DE_info with the new threshold (p-values change) an filter DE genes
DE_genes = results(dds, lfcThreshold=log2(fc), altHypothesis='greaterAbs') %>% data.frame %>%
mutate('ID'=rownames(.)) %>% filter(padj<0.05)
datExpr_DE = datExpr %>% data.frame %>% filter(rownames(.) %in% DE_genes$ID)
n_genes = c(n_genes, nrow(DE_genes))
# Calculate PCAs
datExpr_pca_samps = datExpr_DE %>% t %>% prcomp(scale.=TRUE)
datExpr_pca_genes = datExpr_DE %>% prcomp(scale.=TRUE)
# Create new DF entries
pca_samps_new = datExpr_pca_samps$x %>% data.frame %>% dplyr::select(PC1:PC2) %>%
mutate('ID'=colnames(datExpr), 'fc'=fc, PC1=scale(PC1), PC2=scale(PC2))
pca_genes_new = datExpr_pca_genes$x %>% data.frame %>% dplyr::select(PC1:PC2) %>%
mutate('ID'=DE_genes$ID, 'fc'=fc, PC1=scale(PC1), PC2=scale(PC2))
# Change PC sign if necessary
if(cor(pca_samps_new$PC1, pca_samps_old$PC1)<0) pca_samps_new$PC1 = -pca_samps_new$PC1
if(cor(pca_samps_new$PC2, pca_samps_old$PC2)<0) pca_samps_new$PC2 = -pca_samps_new$PC2
if(cor(pca_genes_new[pca_genes_new$ID %in% pca_genes_old$ID,]$PC1,
pca_genes_old[pca_genes_old$ID %in% pca_genes_new$ID,]$PC1)<0){
pca_genes_new$PC1 = -pca_genes_new$PC1
}
if(cor(pca_genes_new[pca_genes_new$ID %in% pca_genes_old$ID,]$PC2,
pca_genes_old[pca_genes_old$ID %in% pca_genes_new$ID,]$PC2 )<0){
pca_genes_new$PC2 = -pca_genes_new$PC2
}
pca_samps_old = pca_samps_new
pca_genes_old = pca_genes_new
# Update DFs
pcas_samps = rbind(pcas_samps, pca_samps_new)
pcas_genes = rbind(pcas_genes, pca_genes_new)
}
# Add Diagnosis/SFARI score information
pcas_samps = pcas_samps %>% mutate('ID'=substring(ID,2)) %>%
left_join(datMeta, by=c('ID'='Dissected_Sample_ID')) %>%
dplyr::select(ID, PC1, PC2, fc, Diagnosis, Brain_lobe)
# pcas_genes = pcas_genes %>% left_join(SFARI_genes, by='ID') %>%
# mutate('score'=as.factor(`gene-score`)) %>%
# dplyr::select(ID, PC1, PC2, lfc, score)
# Plot change of number of genes
ggplotly(data.frame('lfc'=log2(fc_list), 'n_genes'=n_genes[-1]) %>% ggplot(aes(x=lfc, y=n_genes)) +
geom_point() + geom_line() + theme_minimal() + xlab('|Log Fold Change|') + ylab('Remaining Genes') +
ggtitle('Remaining genes when modifying filtering threshold'))
rm(fc_list, n_genes, fc, pca_samps_new, pca_genes_new, pca_samps_old, pca_genes_old,
datExpr_pca_samps, datExpr_pca_genes)
Note: PC values get smaller as Log2 fold change increases, so on each iteration the values were scaled so it would be easier to compare between frames
LFC = -1 represents the whole set of genes, without any filtering by differential expression
Filtering out genes that are not differentially expressed (\(H_0:lfc=0\)) separates the two diagnosis groups a bit better than using all the genes
Increasing the LFC threshold doesn’t seem to improve the separation between groups
ggplotly(pcas_samps %>% mutate(abs_lfc=ifelse(fc==-1,-1,round(log2(abs(fc)),3))) %>%
ggplot(aes(PC1, PC2, color=Diagnosis)) + geom_point(aes(frame=abs_lfc, ids=ID)) +
theme_minimal() + ggtitle('Samples PCA plot modifying filtering threshold'))
if(!'fcSign' %in% colnames(pcas_genes)){
pcas_genes = pcas_genes %>% left_join(DE_info, by='ID') %>% mutate(fcSign = ifelse(log2FoldChange>0,'Positive','Negative'))
}
ggplotly(pcas_genes %>% mutate(abs_lfc=ifelse(fc==-1,-1,round(log2(fc),3))) %>%
ggplot(aes(PC1, PC2, color=fcSign)) + geom_point(aes(frame=abs_lfc, ids=ID, alpha=0.1)) +
theme_minimal() + ggtitle('Genes PCA plot modifying |LFC| filtering threshold'))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.24 biomaRt_2.42.0
## [3] DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [5] DelayedArray_0.12.2 BiocParallel_1.20.1
## [7] matrixStats_0.55.0 Biobase_2.46.0
## [9] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [11] IRanges_2.20.2 S4Vectors_0.24.3
## [13] BiocGenerics_0.32.0 ClusterR_1.2.1
## [15] gtools_3.8.1 Rtsne_0.15
## [17] GGally_1.4.0 gridExtra_2.3
## [19] viridis_0.5.1 viridisLite_0.3.0
## [21] RColorBrewer_1.1-2 plotlyutils_0.0.0.9000
## [23] plotly_4.9.2 glue_1.3.1
## [25] reshape2_1.4.3 forcats_0.4.0
## [27] stringr_1.4.0 dplyr_0.8.3
## [29] purrr_0.3.3 readr_1.3.1
## [31] tidyr_1.0.2 tibble_2.1.3
## [33] ggplot2_3.2.1 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5 Hmisc_4.2-0
## [4] BiocFileCache_1.10.2 plyr_1.8.5 lazyeval_0.2.2
## [7] splines_3.6.0 gmp_0.5-13.6 crosstalk_1.0.0
## [10] digest_0.6.24 htmltools_0.4.0 fansi_0.4.1
## [13] magrittr_1.5 checkmate_1.9.4 memoise_1.1.0
## [16] cluster_2.0.8 annotate_1.64.0 modelr_0.1.5
## [19] askpass_1.1 prettyunits_1.0.2 colorspace_1.4-1
## [22] blob_1.2.1 rvest_0.3.5 rappdirs_0.3.1
## [25] haven_2.2.0 xfun_0.8 crayon_1.3.4
## [28] RCurl_1.95-4.12 jsonlite_1.6 genefilter_1.68.0
## [31] survival_2.44-1.1 gtable_0.3.0 zlibbioc_1.32.0
## [34] XVector_0.26.0 scales_1.1.0 DBI_1.1.0
## [37] miniUI_0.1.1.1 Rcpp_1.0.3 xtable_1.8-4
## [40] progress_1.2.2 htmlTable_1.13.1 foreign_0.8-71
## [43] bit_1.1-15.2 Formula_1.2-3 htmlwidgets_1.5.1
## [46] httr_1.4.1 acepack_1.4.1 farver_2.0.3
## [49] pkgconfig_2.0.3 reshape_0.8.8 XML_3.99-0.3
## [52] nnet_7.3-12 dbplyr_1.4.2 locfit_1.5-9.1
## [55] tidyselect_0.2.5 labeling_0.3 rlang_0.4.4
## [58] later_1.0.0 AnnotationDbi_1.48.0 munsell_0.5.0
## [61] cellranger_1.1.0 tools_3.6.0 cli_2.0.1
## [64] generics_0.0.2 RSQLite_2.2.0 broom_0.5.4
## [67] fastmap_1.0.1 evaluate_0.14 yaml_2.2.0
## [70] bit64_0.9-7 fs_1.3.1 nlme_3.1-139
## [73] mime_0.9 ggExtra_0.9 xml2_1.2.2
## [76] compiler_3.6.0 rstudioapi_0.10 curl_4.3
## [79] reprex_0.3.0 geneplotter_1.64.0 stringi_1.4.6
## [82] highr_0.8 lattice_0.20-38 Matrix_1.2-17
## [85] vctrs_0.2.2 pillar_1.4.3 lifecycle_0.1.0
## [88] data.table_1.12.8 bitops_1.0-6 httpuv_1.5.2
## [91] R6_2.4.1 latticeExtra_0.6-28 promises_1.1.0
## [94] assertthat_0.2.1 openssl_1.4.1 withr_2.1.2
## [97] GenomeInfoDbData_1.2.2 hms_0.5.3 grid_3.6.0
## [100] rpart_4.1-15 rmarkdown_1.14 Cairo_1.5-10
## [103] shiny_1.4.0 lubridate_1.7.4 base64enc_0.1-3